# Import and clean old damage assessments for pre-2013 CA fires.
pacman::p_load(tidyr, readxl, data.table, sf)

source("code/globals.R")

# Create an empty data.table in the correct format
cleandata <- data.table(matrix(data = NA, nrow = 0, ncol = 9))
cleandata <- data.table(
  State = character(), FIPS = character(), APN = character(), APNSeq = numeric(),
  Address = character(), FireName = character(), FireDate = character(),
  DamageLevel = character(), NotesField = character(), howmanystructures = numeric()
)
cleandata$FireDate <- as.Date(cleandata$FireDate)
cleandata$DamageLevel <- factor(cleandata$DamageLevel, levels = c("Unknown", "NoDamage", "Minor", "Destroyed"), ordered = TRUE)

# Function to convert coordinates in the form of "DD MM.MMM" to "DD.DDDD" where the former is space delimited, and an arbitrary number of digits.
# This could alternatively be accomplished by loading other packages
degreesminutes_to_degrees <- function(string) {
  whole_degrees <- strsplit(string, " ")[[1]][[1]] %>% as.numeric()
  decimal_degrees <- strsplit(string, " ")[[1]][[2]] %>% as.numeric() / 60
  return(whole_degrees + decimal_degrees)
}

# Jones (1999) ----

jones <- read_xls(file.path(INPUT_PUBLIC, "DamageData", "NotDINS", "CAnotDINS", "Jones_1999", "Jones_DamageWB.xls")) %>%
  filter(grepl("Home", Struct_Church)) %>%
  mutate(
    State = "CA",
    FIPS = "06089",
    APN = as.character(APN),
    Address = sprintf("%s %s", ResStreetNo, ResStreetName),
    FireName = "Jones",
    FireDate = as.Date("1999-10-13")
  ) %>%
  mutate(DamageLevel = case_when(
    Condition == "DAM" ~ "Minor",
    Condition == "DES" ~ "Destroyed",
    Condition == "SAV" ~ "NoDamage",
    TRUE ~ "Unknown"
  )) %>%
  mutate(DamageLevel = factor(DamageLevel, levels(cleandata$DamageLevel), ordered = T)) %>%
  rename(reported_lat = Latitude, reported_lon = Longitude)

# Remove duplicates by address
jones <- jones %>%
  group_by(Address) %>%
  mutate(howmanystructures = n()) %>%
  ungroup() %>%
  arrange(Address, desc(DamageLevel)) %>% # Arrange so that "Destroyed" comes first
  distinct(Address, .keep_all = T) %>%
  select(State, FIPS, APN, Address, FireName, FireDate, DamageLevel, howmanystructures, reported_lat, reported_lon)

# Grass Valley AKA Arrowhead (2007) ----
grassvalley <- read_xlsx(file.path(INPUT_PUBLIC, "DamageData", "NotDINS", "CAnotDINS", "GrassValley_2007", "Arrowhead.xlsx")) %>%
  rename("Suffix" = starts_with("Su") & ends_with("x")) %>% # Fix character formatting issue - a 'ligature' of ffi in suffix
  mutate(
    State = "CA",
    FIPS = "06071",
    APN = as.character(APN),
    Address = sprintf("%s %s %s, %s", `Street #`, `Street Name`, `Suffix`, `City/Area`),
    FireName = "Grass Valley",
    FireDate = as.Date("2007-10-22"),
    reported_lat = NA,
    reported_lon = NA
  ) %>%
  mutate(DamageLevel = case_when(
    Damage == "Partial" ~ "Minor",
    Damage == "Total" ~ "Destroyed",
    TRUE ~ NA_character_
  )) %>%
  mutate(DamageLevel = factor(DamageLevel, levels(cleandata$DamageLevel), ordered = T))

# Remove duplicates by APN
grassvalley <- grassvalley %>%
  group_by(Address) %>%
  mutate(howmanystructures = n()) %>%
  ungroup() %>%
  arrange(APN, desc(DamageLevel)) %>% # Arrange so that "Destroyed comes first"
  distinct(APN, .keep_all = T) %>%
  select(State, FIPS, APN, Address, FireName, FireDate, DamageLevel, howmanystructures, reported_lat, reported_lon)



# Summit (2008) ----
summit <- read_xls(file.path(INPUT_PUBLIC, "DamageData", "NotDINS", "CAnotDINS", "Summit_2008", "Summit Fire Damage Assessment Master Spreadsheet.xls"), sheet = "Structures") %>%
  filter(`Structure Type` == "Dwelling") %>%
  mutate(
    State = "CA",
    FIPS = "06087",
    APN = APN,
    Address = `Situs Address`,
    FireName = "Summit",
    FireDate = as.Date("2008-05-22")
  ) %>%
  mutate(DamageLevel = case_when(
    `Percent of Damage` == 0 ~ "NoDamage",
    `Percent of Damage` <= 50 ~ "Minor",
    `Percent of Damage` > 50 ~ "Destroyed",
    TRUE ~ "Unknown"
  )) %>%
  mutate(DamageLevel = factor(DamageLevel, levels(cleandata$DamageLevel), ordered = T))

summit$reported_lat <- summit$Latitude %>%
  lapply(degreesminutes_to_degrees) %>%
  as.numeric() # Convert latitude to degrees rather than degrees minutes
summit$reported_lon <- summit$Longitude %>%
  lapply(degreesminutes_to_degrees) %>%
  as.numeric() # Convert longitude to degrees rather than degrees minutes

# Drop duplicates, don't drop missing APNs
summit <- summit %>%
  group_by(Address) %>%
  mutate(howmanystructures = n()) %>%
  ungroup() %>%
  arrange(APN, desc(DamageLevel)) %>% # Arrange so that "Destroyed" comes first
  distinct(APN, .keep_all = T) %>%
  select(State, FIPS, APN, Address, FireName, FireDate, DamageLevel, howmanystructures, reported_lat, reported_lon)


# Trabing (2008) ----
trabing <- read_xls(file.path(INPUT_PUBLIC, "DamageData", "NotDINS", "CAnotDINS", "Trabing_2008", "Part 3- Spreadsheet.xls")) %>%
  filter(`Property Type` == "Dwelling") %>%
  mutate(
    State = "CA",
    FIPS = "06087",
    Address = `Situs Address`,
    FireName = "Trabing",
    FireDate = as.Date("2008-06-20"),
    `Percent of Damage` = as.numeric(`Percent of Damage`)
  ) %>%
  mutate(DamageLevel = case_when(
    `Percent of Damage` == 0 ~ "NoDamage",
    `Percent of Damage` <= 50 ~ "Minor",
    `Percent of Damage` > 50 ~ "Destroyed",
    TRUE ~ "Unknown"
  )) %>%
  mutate(DamageLevel = factor(DamageLevel, levels(cleandata$DamageLevel), ordered = T))

trabing$reported_lat <- trabing$Latitude %>%
  lapply(degreesminutes_to_degrees) %>%
  as.numeric() # Convert latitude to degrees rather than degrees minutes
trabing$reported_lon <- trabing$Longitude %>%
  lapply(degreesminutes_to_degrees) %>%
  as.numeric() # Convert longitude to degrees rather than degrees minutes


trabing <- trabing %>%
  group_by(Address) %>%
  mutate(howmanystructures = n()) %>%
  ungroup() %>%
  arrange(APN, desc(DamageLevel)) %>% # Arrange so that "Destroyed" comes first
  distinct(APN, .keep_all = T) %>%
  select(State, FIPS, APN, Address, FireName, FireDate, DamageLevel, howmanystructures, reported_lat, reported_lon)

# Martin (2008) ----

martin <- read_xlsx(file.path(INPUT_PUBLIC, "DamageData", "NotDINS", "CAnotDINS", "Martin_2008", "MartinManuallyEnteredHomesDamageData.xlsx")) %>%
  filter(`Property Type` == "Dwelling") %>%
  mutate(
    State = "CA",
    FIPS = "06087",
    Address = `Situs Address`,
    APN = APN,
    FireName = "Martin",
    FireDate = as.Date("2008-06-11")
  ) %>%
  mutate(DamageLevel = case_when(
    `Percent of Damage` == 0 ~ "NoDamage",
    `Percent of Damage` <= 50 ~ "Minor",
    `Percent of Damage` > 50 ~ "Destroyed",
    TRUE ~ "Unknown"
  )) %>%
  mutate(DamageLevel = factor(DamageLevel, levels(cleandata$DamageLevel), ordered = T))

martin$reported_lat <- martin$Latitude %>%
  lapply(degreesminutes_to_degrees) %>%
  as.numeric() # Convert latitude to degrees rather than degrees minutes
martin$reported_lon <- martin$Longitude %>%
  lapply(degreesminutes_to_degrees) %>%
  as.numeric() # Convert longitude to degrees rather than degrees minutes



martin <- martin %>%
  group_by(Address) %>%
  mutate(howmanystructures = n()) %>%
  ungroup() %>%
  arrange(APN, desc(DamageLevel)) %>% # Arrange so that "Destroyed" comes first
  distinct(APN, .keep_all = T) %>%
  select(State, FIPS, APN, Address, FireName, FireDate, DamageLevel, howmanystructures, reported_lat, reported_lon)

# Humdboldt (2008) ----

humboldt <- read_xlsx(file.path(INPUT_PUBLIC, "DamageData", "NotDINS", "CAnotDINS", "Humboldt_2008", "Humboldt_DB_Converted.xlsx")) %>%
  filter(`6StructureType` %in% c("house", "mobile")) %>%
  mutate(
    State = "CA",
    FIPS = "06007",
    Address = sprintf("%s %s", `1AddressNo`, `2StreetName`),
    APN = `APN_BLDG_F`,
    FireName = "Humboldt",
    FireDate = as.Date("2008-06-11"),
    reported_lat = NA,
    reported_lon = NA
  ) %>%
  mutate(DamageLevel = case_when(
    `13DamageStatus` %in% c("des", "maj") ~ "Destroyed",
    `13DamageStatus` %in% c("min") ~ "Minor",
    `13DamageStatus` %in% c("sav", "sup") ~ "NoDamage",
    TRUE ~ "Unknown"
  )) %>%
  mutate(DamageLevel = factor(DamageLevel, levels(cleandata$DamageLevel), ordered = T))

humboldt <- humboldt %>%
  group_by(Address) %>%
  mutate(howmanystructures = n()) %>%
  ungroup() %>%
  arrange(APN, desc(DamageLevel)) %>% # Arrange so that "Destroyed" comes first
  distinct(APN, .keep_all = T) %>%
  select(State, FIPS, APN, Address, FireName, FireDate, DamageLevel, howmanystructures, reported_lat, reported_lon)

# Ophir (2008) ----
ophir <- read_csv(file.path(INPUT_PUBLIC, "DamageData", "NotDINS", "CAnotDINS", "Ophir_2008", "OphirManuallyEnteredHomesDamageData.csv")) %>%
  mutate(
    State = "CA",
    FIPS = "06007",
    Address = sprintf("%s %s", address_num, address_road),
    APN = as.character(apn),
    FireName = "Ophir",
    FireDate = as.Date("2008-06-11"),
    reported_lat = NA,
    reported_lon = NA
  ) %>%
  mutate(DamageLevel = case_when(
    condition == "Destroyed" ~ "Destroyed",
    condition == "Minor damage" ~ "Minor",
    condition == "Undamaged" ~ "NoDamage",
    TRUE ~ "Unknown"
  )) %>%
  mutate(DamageLevel = factor(DamageLevel, levels(cleandata$DamageLevel), ordered = T))

ophir <- ophir %>%
  filter(`struct_type` %in% c("house", "mobile")) %>%
  group_by(Address) %>%
  mutate(howmanystructures = n()) %>%
  ungroup() %>%
  arrange(APN, desc(DamageLevel)) %>% # Arrange so that "Destroyed" comes first
  distinct(APN, .keep_all = T) %>%
  select(State, FIPS, APN, Address, FireName, FireDate, DamageLevel, howmanystructures, reported_lat, reported_lon)

# 49er (2009) ----
f49er <- read_xls(file.path(INPUT_PUBLIC, "DamageData", "NotDINS", "CAnotDINS", "49er_2009", "Damage Assessment-Property-Auburn_49_Fire_2009.xls")) %>%
  filter(Classification %in% c("Residence", "Mobile Home", "Residence on Commercial Land")) %>%
  mutate(
    State = "CA",
    FIPS = "06061",
    APN = `APN #`,
    Address = `PROPERTY ADDRESS`,
    FireName = "49er",
    FireDate = as.Date("2009-08-30"),
    reported_lat = NA,
    reported_lon = NA
  ) %>%
  mutate(DamageLevel = case_when(
    STATUS == "Saved" ~ "NoDamage",
    STATUS %in% c("Superficial Damage", "Minor Damage") ~ "Minor",
    STATUS %in% c("Destroyed", "Major Damage") ~ "Destroyed",
    TRUE ~ NA_character_
  )) %>%
  mutate(DamageLevel = factor(DamageLevel, levels(cleandata$DamageLevel), ordered = T))

# Drop duplicated APNs and return chosen fields (don't drop missing APNs but it doesn't look like there are any anyway)
f49er <- f49er %>%
  group_by(Address) %>%
  mutate(howmanystructures = n()) %>%
  ungroup() %>%
  arrange(APN, desc(DamageLevel)) %>% # Arrange so that "Destroyed" comes first
  distinct(APN, .keep_all = T) %>%
  select(State, FIPS, APN, Address, FireName, FireDate, DamageLevel, howmanystructures, reported_lat, reported_lon)

# Lockheed (2009) ----
# Load data, keeping only dwellings
lockheed <- read_xls(file.path(INPUT_PUBLIC, "DamageData", "NotDINS", "CAnotDINS", "Lockheed_2009", "Lockheed DINS Data 8-22-09.xls"),
  sheet = "Lockheed Fire Damage Assessment"
) %>%
  filter(!is.na(APN) & `Property Type` == "Dwelling") %>%
  mutate(
    State = "CA",
    FIPS = "06087",
    Address = `Situs Address`,
    FireName = "Lockheed",
    FireDate = as.Date("2009-08-12")
  ) %>%
  mutate(DamageLevel = case_when(
    `Percent of Damage` == 0 ~ "NoDamage",
    `Percent of Damage` == 100 ~ "Destroyed",
    TRUE ~ "Unknown"
  )) %>%
  mutate(DamageLevel = factor(DamageLevel, levels(cleandata$DamageLevel), ordered = T))

## Cleanup on geographic coordinates:
# Correct one coord set that is formatted XX.XX.XXX instead of XX XX.XXX.
## Drop 2 structures with NA geographic coords. Neither is reported destroyed (one survived, one has NA damage).
lockheed <- lockheed %>%
  mutate(
    Latitude = if_else(Latitude == "37.02.840", "37 02.840", Latitude),
    Longitude = if_else(Longitude == "122.12.953", "122 12.953", Longitude)
  ) %>%
  filter(!is.na(Latitude))

lockheed <- lockheed %>%
  mutate(
    reported_lat = degreesminutes_to_degrees(Latitude),
    reported_lon = degreesminutes_to_degrees(Longitude)
  )

# Save needed fields, drop duplicates
lockheed <- lockheed %>%
  group_by(Address) %>%
  mutate(howmanystructures = n()) %>%
  ungroup() %>%
  arrange(APN, desc(DamageLevel)) %>% # Arrange so that "Destroyed" comes first
  distinct(APN, .keep_all = T) %>%
  select(State, FIPS, APN, Address, FireName, FireDate, DamageLevel, howmanystructures, reported_lat, reported_lon)


# Canyon (2011) ----

canyon <- read_xls(file.path(INPUT_PUBLIC, "DamageData", "NotDINS", "CAnotDINS", "Canyon_2011", "Damage Assessment Index Spreadsheet Blank_KevinManuallyChecked.xls")) %>%
  filter(IsHome == 1) %>%
  mutate(
    State = "CA",
    FIPS = "06029",
    Address = sprintf("%s, %s", Address, City),
    APN = as.character(APN),
    FireName = "Canyon",
    FireDate = as.Date("2011-09-04")
  ) %>%
  mutate(DamageLevel = case_when(
    Destroyed == 1 ~ "Destroyed",
    Destroyed == 0 ~ "NoDamage",
    TRUE ~ "Unknown"
  )) %>%
  mutate(DamageLevel = factor(DamageLevel, levels(cleandata$DamageLevel), ordered = T))

# Manually reformat the weird way that the coordinates were formatted in the source data
canyon$Latitude <- paste(
  canyon$`GPS Location` %>% lapply(substr, 2, 3),
  canyon$`GPS Location` %>% lapply(substr, 6, 10)
)
canyon$Longitude <- paste(
  canyon$`GPS Location` %>% lapply(substr, 14, 16),
  canyon$`GPS Location` %>% lapply(substr, 19, 30)
)
canyon$reported_lat <- canyon$Latitude %>%
  lapply(degreesminutes_to_degrees) %>%
  as.numeric() # Convert latitude to degrees rather than degrees minutes
canyon$reported_lon <- canyon$Longitude %>%
  lapply(degreesminutes_to_degrees) %>%
  as.numeric() # Convert longitude to degrees rather than degrees minutes


canyon <- canyon %>%
  group_by(Address) %>%
  mutate(howmanystructures = n()) %>%
  ungroup() %>%
  arrange(Address, desc(DamageLevel)) %>% # Arrange so that "Destroyed" comes first
  distinct(Address, .keep_all = T) %>%
  select(State, FIPS, APN, Address, FireName, FireDate, DamageLevel, howmanystructures, reported_lat, reported_lon) %>%
  mutate(NotesField = "No APNs included and some addresses missing, may not include all destroyed structures.")

# Note that they didn't include any APNs and many destroyed structures had no addresses. So we flag this data as a bit suspect.

# Ponderosa (2012) ----

ponderosa <- read_csv(file.path(INPUT_PUBLIC, "DamageData", "NotDINS", "CAnotDINS", "Ponderosa_2012", "PonderosaParsedHomesData-v2.csv")) %>%
  mutate(
    State = "CA",
    FIPS = sprintf("%05d", as.numeric(fips)),
    Address = address,
    APN = apn,
    FireName = "Ponderosa",
    FireDate = as.Date("2012-08-18")
  ) %>%
  mutate(DamageLevel = case_when(
    num_destroyed_res > 0 ~ "Destroyed",
    num_destroyed_res == 0 ~ "NoDamage",
    TRUE ~ "Unknown"
  )) %>%
  mutate(DamageLevel = factor(DamageLevel, levels(cleandata$DamageLevel), ordered = T)) %>%
  rename(reported_lat = lat, reported_lon = lon)

ponderosa <- ponderosa %>%
  filter(!(APN %in% c("ROW", "NO DATA"))) %>%
  group_by(Address) %>%
  mutate(howmanystructures = n()) %>%
  ungroup() %>%
  arrange(APN, desc(DamageLevel)) %>% # Arrange so that "Destroyed" comes first
  distinct(APN, .keep_all = T) %>%
  select(State, FIPS, APN, Address, FireName, FireDate, DamageLevel, howmanystructures, reported_lat, reported_lon)

# Shockey (2012) ----

shockey <- read_csv(file.path(INPUT_PUBLIC, "DamageData", "NotDINS", "CAnotDINS", "Shockey_2012", "ShockeyManuallyEnteredHomesDamageData.csv")) %>%
  mutate(
    State = "CA",
    FIPS = "06073",
    APN = as.character(apn),
    Address = address,
    FireName = "Shockey",
    FireDate = as.Date("2012-09-23"),
    reported_lat = NA,
    reported_lon = NA
  ) %>%
  mutate(DamageLevel = case_when(
    destroyed == 1 ~ "Destroyed",
    comments == "minor damage" ~ "Minor",
    destroyed == 0 ~ "NoDamage",
    TRUE ~ "Unknown"
  )) %>%
  mutate(DamageLevel = factor(DamageLevel, levels(cleandata$DamageLevel), ordered = T))

shockey <- shockey %>%
  group_by(Address) %>%
  mutate(howmanystructures = n()) %>%
  ungroup() %>%
  arrange(APN, desc(DamageLevel)) %>% # Arrange so that "Destroyed" comes first
  distinct(APN, .keep_all = T) %>%
  select(State, FIPS, APN, Address, FireName, FireDate, DamageLevel, howmanystructures, reported_lat, reported_lon)

# Combine all fires including unused columns to match other damage datasets ----

old_CA_fires <- bind_rows(
  cleandata,
  jones,
  grassvalley,
  summit,
  trabing,
  martin,
  humboldt,
  ophir,
  f49er,
  lockheed,
  canyon,
  ponderosa,
  shockey
)

# Save original address, APN, lon/lat
old_CA_fires <- old_CA_fires %>%
  mutate(
    APN_orig = APN,
    address_orig = Address,
    lon_orig = reported_lon,
    lat_orig = reported_lat,
    in_damage_data = 1
  )

## -- Flag bad/unknown damage records to drop after ZTRAX merge & neighbor identification (so get removed from comparison group too)
old_CA_fires <- old_CA_fires %>%
  mutate(badrecordflag = case_when(
    DamageLevel == "Unknown" ~ TRUE,
    !is.na(howmanystructures) & howmanystructures > 1 ~ TRUE,
    TRUE ~ FALSE
  )) %>%
  # Rename some things for consistency with DINS, so that I can process this with same functions that do DINS further down the pipe.
  dplyr::rename(UnformattedAssessorParcelNumber = APN) %>%
  dplyr::rename(INCIDENTNAME = FireName) %>%
  dplyr::rename(DAMAGE = DamageLevel) %>%
  dplyr::mutate(DAMAGE = as.character(DAMAGE)) %>%
  dplyr::rename(date = FireDate) %>%
  dplyr::mutate(date = as.Date(date)) %>%
  dplyr::mutate(FIPS = as.numeric(FIPS)) %>%
  dplyr::mutate(INCIDENTNUM = paste(INCIDENTNAME, FIPS, year(date), sep = "_")) %>%
  dplyr::mutate(
    STRUCTURETYPE = "NotReported", badstructuretype = FALSE,
    ROOFCONSTRUCTION = "NotReported", EAVES = "NotReported",
    EXTERIORSIDING = "NotReported", WINDOWPANE = "NotReported",
    VENTSCREEN = "NotReported", DECKPORCHONGRADE = "NotReported", DECKPORCHELEVATED = "NotReported",
    PATIOCOVERCARPORT = "NotReported", FENCEATTACHEDTOSTRUCTURE = "NotReported"
  )

summary(old_CA_fires)

## Save a lists of counties, so I know which counties to import from ZTRAX
saveRDS(unique(as.numeric(old_CA_fires$FIPS)), file.path(WORKING, "counties_in_older_CA_fires.Rds"))

## Save geographic coordinates for use downstream
old_CA_fires %>%
  select(State, FIPS, UnformattedAssessorParcelNumber, APNSeq, Address, INCIDENTNAME, reported_lat, reported_lon) %>%
  saveRDS(file.path(WORKING, "damage_CA_older_coords.Rds"))

## Drop the Canyon Fire, which has no APNs reported
old_CA_fires <- old_CA_fires %>%
  dplyr::filter(INCIDENTNUM != "Canyon_6029_2011")

## Save without geographic coordinates (for consistency of var names with other damage import datasets)
old_CA_fires %>%
  dplyr::select(-reported_lat, -reported_lon) %>%
  saveRDS(file.path(WORKING, "damage_CA_older.Rds"))
